ggsave <- function(...) {message("skipped call to ggsave")}
read DNA quality metrics from scbs output (naive lineage)
mofa_out <- read_tsv("/mnt/volume/20-11-03_scNMT-SVZ2/21-12-01_scNMT-cell-metadata.tsv", show_col_types=F) %>%
dplyr::select(sample, cell_id_dna, run_date_dna, run_id_dna, ilse_no_dna, read_count_dna,
experiment_id, experiment, cell_id_rna, run_date_rna, run_id_rna, ilse_no_rna, read_count_rna,
n_obs_cpg, global_meth_frac_cpg, celltype_tissue)
wt_celltype_labels <- mofa_out %>%
dplyr::select(sample, wt_celltype_label=celltype_tissue)
dread DNA metadata (ischemia data)
read_cpg_stats <- function(cell_stats_path) {
# read the cell_stats.csv generated by scbs
read_csv(cell_stats_path, show_col_types=F) %>%
mutate(cell_id_dna = str_remove(cell_name, "_R1R2_dedup.NOMe.[CG]p[CG]")) %>%
dplyr::select(cell_id_dna, n_obs_cpg=n_obs, global_meth_frac_cpg=global_meth_frac)
}
read_seq_stats_dna <- function(meta_tsv, experiment_id) {
# read the *meta.tsv file produced by the DKFZ core facility scritps
read_tsv(meta_tsv, show_col_types=F) %>%
dplyr::filter(!str_detect(FASTQ_FILE, "[Uu]ndetermined")) %>%
mutate(sample = paste(SAMPLE_NAME, experiment_id),
experiment_id = experiment_id,
cell_id_dna = str_remove(FASTQ_FILE, "_R[12].fastq.gz"),
run_date_dna = RUN_DATE,
run_id_dna = RUN_ID,
ilse_no_dna = ILSE_NO,
read_count_dna = READ_COUNT) %>%
dplyr::select(sample, cell_id_dna, run_date_dna, run_id_dna, ilse_no_dna, read_count_dna) %>%
distinct()
}
dna_stats <- read_cpg_stats("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/05_sparse-matrix/CpG/cell_stats.csv")
dna_meta <- bind_rows(
read_seq_stats_dna(
"/mnt/A290/220121_NB551305_0418_AHY3JMBGXK_scNMT-ischemia-48h/DNA/all_meta.tsv",
experiment_id = "pI"),
read_seq_stats_dna(
"/mnt/A290/211122_NB551305_0406_AH3VK3AFX3_ischemia-GLAST-SVZ-Striatum-scNMT/DNA/all_meta.tsv",
experiment_id = "pH"),
read_seq_stats_dna(
"/mnt/A290/220714_NB551305_0439_AHV7H2AFX3_scNMT-ifnagrKO-ischemia48h-svz-glast/DNA/all_meta_suffixed.tsv",
experiment_id = "pJ"),
read_seq_stats_dna(
"/mnt/A290/221110_NB551305_0446_AH37YFAFX5_scNMT-ifnagrKO-ischemia48h-Str-glast/DNA/all_meta.tsv",
experiment_id = "pJ"),
read_seq_stats_dna(
"/mnt/A290/220520_NB551305_0432_AHNJJKAFX3_scNMT-naive-astro-Ctx-Str/DNA/all_meta.tsv",
experiment_id = "pL")
)
## Warning: One or more parsing issues, see `problems()` for details
## One or more parsing issues, see `problems()` for details
dna_stats %>%
right_join(dna_meta, by="cell_id_dna") %>%
write_tsv("/mnt/volume/22-04-11_scNMT-ischemia/22-11-21_ischemic-WT-KO-cells-DNA-metadata.tsv")
read RNA metadata
read_seq_stats_rna <- function(meta_tsv, experiment_id) {
read_tsv(meta_tsv, show_col_types=F) %>%
mutate(cell_id_dna = str_remove(FASTQ_FILE, "_R[12].fastq.gz"),
sample = paste(SAMPLE_NAME, experiment_id),
experiment_id = experiment_id) %>%
mutate(
cell_id_rna = str_remove(FASTQ_FILE, "_R[12].fastq.gz"),
run_date_rna = RUN_DATE,
run_id_rna = RUN_ID,
ilse_no_rna = ILSE_NO,
read_count_rna = READ_COUNT
) %>%
dplyr::select(sample, cell_id_rna, run_date_rna, run_id_rna, ilse_no_rna, read_count_rna) %>%
distinct()
}
rna_meta <- bind_rows(
read_seq_stats_rna(
"/mnt/A290/220121_NB551305_0418_AHY3JMBGXK_scNMT-ischemia-48h/RNA/24221_meta.tsv",
"pI"
),
read_seq_stats_rna(
"/mnt/A290/211122_NB551305_0406_AH3VK3AFX3_ischemia-GLAST-SVZ-Striatum-scNMT/RNA/23809_meta.tsv",
"pH"
),
read_seq_stats_rna(
"/mnt/A290/220714_NB551305_0439_AHV7H2AFX3_scNMT-ifnagrKO-ischemia48h-svz-glast/RNA/all_meta_suffixed.tsv",
"pJ"
),
read_seq_stats_rna(
"/mnt/A290/221110_NB551305_0446_AH37YFAFX5_scNMT-ifnagrKO-ischemia48h-Str-glast/RNA/32497_meta.tsv",
"pJ"
)
)
## Warning: One or more parsing issues, see `problems()` for details
read_zUMIs <- function(rds_path, samples_txt, meta_tsv) {
rds <- readRDS(rds_path)
meta <- read_tsv(meta_tsv, show_col_types = F) %>% filter(READ == 1)
cell_names <- read_tsv(samples_txt, col_types="cccc") %>%
left_join(meta, by = c("r1"="FASTQ_FILE"))
bc2cellname <- cell_names %>% dplyr::select(BC, SAMPLE_NAME) %>% deframe()
rds$readcount$inex$all %>%
magrittr::set_colnames(bc2cellname[colnames(.)])
}
gene_id_to_symbol <- "/mnt/o_drive/Lukas/resources/id_maps/mm10ensembl99_gene-id_to_gene-symbol.tsv" %>%
read_tsv(col_types = "cc") %>%
deframe()
gene_symbol_to_id <- names(gene_id_to_symbol)
names(gene_symbol_to_id) <- unname(gene_id_to_symbol)
name2id <- "/mnt/o_drive/Lukas/resources/id_maps/mm10ensembl100_gene-id_to_gene-symbol-including-synonyms.tsv" %>%
read_tsv(col_types = "cc") %>%
deframe()
naive_mtx_hqmeth <- fread("/mnt/volume/20-11-03_scNMT-SVZ2/analysis/matrices/RNA.csv.gz", sep=",", data.table=F) %>%
column_to_rownames(var="sample") %>%
as.matrix() %>%
magrittr::set_colnames(unname(gene_symbol_to_id[colnames(.)])) %>%
magrittr::extract( , !is.na(colnames(.)))
# we already loaded these cells, don't need to load them twice:
str_astros_hq_meth <- tibble(sample = rownames(naive_mtx_hqmeth)) %>%
dplyr::filter(str_detect(sample, "^[Ss][Tt][Rr].+pE")) %>%
mutate(sample = str_replace(sample, "STRASTRO-B", "Str Astro-2 "),
sample = str_replace(sample, "STRASTRO", "Str Astro ")) %>%
pull(sample)
ss2_naive_str_mtx <- read_zUMIs(
"/mnt/volume/20-11-03_scNMT-SVZ2/RNA/SmartSeq2_StrAstro-NSC/01_zUMIs/zUMIs_output/expression/scNMT_StrAstro-NSC.dgecounts.rds",
"/mnt/volume/20-11-03_scNMT-SVZ2/RNA/SmartSeq2_StrAstro-NSC/00_raw-reads/reads_for_zUMIs.samples.txt",
"/mnt/A290/201130_NB551305_0286_AHK7WFAFX2_StrAstro-NSC-scNMTseq/RNA/20474_meta.tsv") %>%
t() %>% magrittr::set_rownames(paste(rownames(.), "pE")) %>%
magrittr::extract(str_detect(rownames(.), "^[Ss][Tt][Rr]"), ) %>% # only take striatal cells
magrittr::extract(rownames(.) %notin% str_astros_hq_meth, ) %>% # remove cells we already loaded
magrittr::extract(!str_detect(rownames(.), "bulk"), )
ss3_naive_str_mtx <- read_zUMIs(
"/mnt/volume/22-05-24_scNMT-naive-astro-Ctx-Str-TcfLef/RNA/01_zUMIs/zUMIs_output/expression/scNMT-naive-astro-Ctx-Str.dgecounts.rds",
"/mnt/volume/22-05-24_scNMT-naive-astro-Ctx-Str-TcfLef/RNA/00_raw-reads/reads_for_zUMIs.samples.txt",
"/mnt/A290/220520_NB551305_0432_AHNJJKAFX3_scNMT-naive-astro-Ctx-Str/RNA/28249_meta.tsv") %>%
t() %>% magrittr::set_rownames(paste(rownames(.), "pL")) %>%
magrittr::extract(str_detect(rownames(.), "^[Ss][Tt][Rr]"), )
naive_mtx <- Matrix.utils::rBind.fill(
naive_mtx_hqmeth,
ss2_naive_str_mtx,
ss3_naive_str_mtx) %>%
as.matrix() %>%
magrittr::extract( , colnames(.)[colnames(.) %in% names(gene_id_to_symbol)])
peek(naive_mtx)
## ENSMUSG00000000001 ENSMUSG00000000003 ENSMUSG00000000028
## ASTRO1 pA 0 0 0.000
## ASTRO11 pA 183 0 0.000
## ASTRO14 pA 449 0 0.000
## ASTRO16 pA 0 0 5.954
## ASTRO17 pA 368 0 0.000
## ASTRO19 pA 0 0 1.860
## ASTRO20 pA 186 0 0.000
## ASTRO22 pA 187 0 1.162
## ASTRO4 pA 0 0 62.000
## ASTRO6 pA 76 0 1.298
## ENSMUSG00000000037
## ASTRO1 pA 59
## ASTRO11 pA 0
## ASTRO14 pA 0
## ASTRO16 pA 0
## ASTRO17 pA 0
## ASTRO19 pA 0
## ASTRO20 pA 0
## ASTRO22 pA 0
## ASTRO4 pA 0
## ASTRO6 pA 0
ss3_isch_3wk_mtx <- read_zUMIs(
"/mnt/volume/21-12-10_scNMT-ischemia-striatum/RNA/01_zUMIs/zUMIs_output/expression/scNMT_GLAST-SVZ-Striatum.dgecounts.rds",
"/mnt/volume/21-12-10_scNMT-ischemia-striatum/RNA/00_raw-reads/reads_for_zUMIs.samples.txt",
"/mnt/A290/211122_NB551305_0406_AH3VK3AFX3_ischemia-GLAST-SVZ-Striatum-scNMT/RNA/23809_meta.tsv") %>%
t() %>% magrittr::set_rownames(paste(rownames(.), "pH")) #%>% magrittr::extract(rownames(.) %in% cell_info$sample, )
empty_wells <- c(glue::glue("Str_Glast_{313:326} pI"), glue::glue("Str_Glast_{337:384} pI"),
glue::glue("SVZ_Glast_{730:768} pI"), glue::glue("SVZ_Olig_{c(743, 744, 767, 768)} pI"))
ss3_isch_48h_mtx <- read_zUMIs(
"/mnt/o_drive/Lukas/22-01-25_scNMT-ischemia-striatum-48h/RNA/01_zUMIs/zUMIs_output/expression/scNMT_GLAST-SVZ-Striatum.dgecounts.rds",
"/mnt/o_drive/Lukas/22-01-25_scNMT-ischemia-striatum-48h/RNA/00_raw-reads/reads_for_zUMIs.samples.txt",
"/mnt/A290/220121_NB551305_0418_AHY3JMBGXK_scNMT-ischemia-48h/RNA/24221_meta.tsv") %>%
t() %>% magrittr::set_rownames(paste(rownames(.), "pI")) %>%
magrittr::extract(rownames(.) %notin% empty_wells, ) #%>% magrittr::extract(rownames(.) %in% cell_info$sample, )
ss3_ifnKO_isch_48h_mtx <- read_zUMIs(
"/mnt/o_drive/Lukas/22-07-28_scNMT-ifnagrKO-ischemia48h-svz-glast/RNA/01_zUMIs/zUMIs_output/expression/scNMT-ifnagrKO-ischemia48h-svz-glast.dgecounts.rds",
"/mnt/o_drive/Lukas/22-07-28_scNMT-ifnagrKO-ischemia48h-svz-glast/RNA/00_raw-reads/reads_for_zUMIs.samples.txt",
"/mnt/A290/220714_NB551305_0439_AHV7H2AFX3_scNMT-ifnagrKO-ischemia48h-svz-glast/RNA/all_meta_suffixed.tsv") %>%
t() %>% magrittr::set_rownames(paste(rownames(.), "pJ")) #%>% magrittr::extract(rownames(.) %in% cell_info$sample, )
## Warning: One or more parsing issues, see `problems()` for details
ss3_ifnKO_str_isch_48h_mtx <- read_zUMIs(
"/mnt/volume/22-11-11_scNMT-ifnagrKO-ischemia48h-Str-glast/RNA/01_zUMIs/zUMIs_output/expression/scNMT-ifnagrKO-ischemia48h-Str-glast.dgecounts.rds",
"/mnt/volume/22-11-11_scNMT-ifnagrKO-ischemia48h-Str-glast/RNA/00_raw-reads/reads_for_zUMIs.samples.txt",
"/mnt/A290/221110_NB551305_0446_AH37YFAFX5_scNMT-ifnagrKO-ischemia48h-Str-glast/RNA/32497_meta.tsv") %>%
t() %>% magrittr::set_rownames(paste(rownames(.), "pJ")) #%>% magrittr::extract(rownames(.) %in% cell_info$sample, )
isch_mtx <- Matrix.utils::rBind.fill(
ss3_isch_3wk_mtx,
ss3_isch_48h_mtx,
ss3_ifnKO_isch_48h_mtx,
ss3_ifnKO_str_isch_48h_mtx) %>%
as.matrix() %>%
magrittr::extract( , colnames(.)[colnames(.) %in% names(gene_id_to_symbol)])
#magrittr::set_colnames(unname(gene_id_to_symbol[colnames(.)]))
peek(isch_mtx)
## ENSMUSG00000000001 ENSMUSG00000000003 ENSMUSG00000000028
## SVZ_Olig_10 pH 0 0 0
## Str_Glast_81 pH 0 0 0
## SVZ_Glast_83 pH 0 0 0
## Str_Glast_58 pH 25 0 0
## SVZ_Glast_42 pH 0 0 0
## SVZ_Olig_7 pH 0 0 0
## SVZ_Glast_12 pH 24 0 0
## Str_Glast_55 pH 13 0 0
## Str_Glast_134 pH 84 0 1
## Str_Glast_163 pH 0 0 0
## ENSMUSG00000000031
## SVZ_Olig_10 pH 0
## Str_Glast_81 pH 0
## SVZ_Glast_83 pH 0
## Str_Glast_58 pH 0
## SVZ_Glast_42 pH 0
## SVZ_Olig_7 pH 0
## SVZ_Glast_12 pH 0
## Str_Glast_55 pH 0
## Str_Glast_134 pH 0
## Str_Glast_163 pH 0
obs_ifnko <- read_csv("/mnt/o_drive/Lukas/data_from_others/IfnKO_10x/ifnagrko_obs.csv", show_col_types=F) %>%
dplyr::rename(celltype_10X=celltype) %>%
mutate(tissue = if_else(celltype_10X %in% c("endothelial", "microglia", "neuron", "ob astrocyte", "oec"), "OB", "SVZ"))
barcode_to_celltype_ifnko <- obs_ifnko %>%
dplyr::select(barcode, celltype_10X) %>%
deframe()
var_ifnko <- read_csv("/mnt/o_drive/Lukas/data_from_others/IfnKO_10x/ifnagrko_var.csv", show_col_types=F, col_types="-cc") %>%
mutate(gene_id_simple = make.unique(str_extract(gene_id, "ENSMUSG\\d+"), sep="_"))
## New names:
## • `` -> `...1`
rna_mtx_raw_ifnko <- readMM("/mnt/o_drive/Lukas/data_from_others/IfnKO_10x/ifnagrko_raw_counts.mtx.gz") %>%
magrittr::set_rownames(obs_ifnko$barcode) %>%
magrittr::set_colnames(var_ifnko$gene_id_simple)
wt_cells <- obs_ifnko %>%
dplyr::filter(genotype == "WT" & celltype_10X %notin% c("doublet", "singleton")) %>%
pull(barcode)
rna_mtx_raw_10X <- rna_mtx_raw_ifnko %>%
magrittr::extract(wt_cells, !is.na(colnames(.))) %>%
as.matrix()
peek(rna_mtx_raw_10X)
## ENSMUSG00000051951 ENSMUSG00000025900
## AAACCCAGTCTCCTGT-WT_20mo_2021_001 0 0
## AAACGAAAGGTACTGG-WT_20mo_2021_001 0 0
## AAACGAACATGGCTGC-WT_20mo_2021_001 0 0
## AAACGAAGTAGCTGAG-WT_20mo_2021_001 0 0
## AAACGAATCACCCTCA-WT_20mo_2021_001 0 0
## AAACGCTAGGGCAGGA-WT_20mo_2021_001 0 0
## AAACGCTTCAGTCTTT-WT_20mo_2021_001 0 0
## AAAGAACAGAGATTCA-WT_20mo_2021_001 0 0
## AAAGAACAGAGCAGAA-WT_20mo_2021_001 0 0
## AAAGAACAGGATATGT-WT_20mo_2021_001 0 0
## ENSMUSG00000025902 ENSMUSG00000104328
## AAACCCAGTCTCCTGT-WT_20mo_2021_001 0 0
## AAACGAAAGGTACTGG-WT_20mo_2021_001 0 0
## AAACGAACATGGCTGC-WT_20mo_2021_001 0 0
## AAACGAAGTAGCTGAG-WT_20mo_2021_001 0 0
## AAACGAATCACCCTCA-WT_20mo_2021_001 0 0
## AAACGCTAGGGCAGGA-WT_20mo_2021_001 0 0
## AAACGCTTCAGTCTTT-WT_20mo_2021_001 0 0
## AAAGAACAGAGATTCA-WT_20mo_2021_001 0 0
## AAAGAACAGAGCAGAA-WT_20mo_2021_001 0 0
## AAAGAACAGGATATGT-WT_20mo_2021_001 0 0
find genes that are shared among the data sets
shared_genes <- intersect(colnames(rna_mtx_raw_10X), colnames(isch_mtx)) %>%
intersect(colnames(naive_mtx))
shared_genes_nmt <- intersect(colnames(naive_mtx), colnames(isch_mtx))
Get a log-normalized count matrix of scNMT transcriptomes (naive +
ischemia).
Remove off-target cells (microglia, endothelial cells,
oligodendrocytes…) determined in a previous iteration of this
script.
offtarget_cells <- scan("/mnt/volume/22-04-11_scNMT-ischemia/analysis/offtarget_cells.txt",
what=character(), sep="\n")
nmt_mtx <- rbind(
naive_mtx[, shared_genes_nmt],
isch_mtx[, shared_genes_nmt]
) %>% magrittr::extract(rownames(.) %notin% offtarget_cells, )
filter cells with low number of observed genes
n_genes <- enframe(rowSums(nmt_mtx > 0), name="sample", value="n_genes")
n_genes %>%
left_join(rna_meta, by="sample") %>%
mutate(experiment_id = if_else(
str_detect(sample, "Str_Glast_KO"),
paste(str_extract(sample, "p[A-Z]$"), "Str"),
str_extract(sample, "p[A-Z]$")
)) %>%
ggplot(aes(x = experiment_id, y = n_genes, color = n_genes <= 1500)) +
geom_quasirandom(size=.1)
nmt_mtx <- nmt_mtx[unname(rowSums(nmt_mtx > 0) > 1500), ]
nmt_mtx_lognorm <- nmt_mtx %>%
magrittr::divide_by(rowSums2(.)) %>%
magrittr::multiply_by(1e4) %>%
log1p()
genes_for_wilcox <- (colSums(nmt_mtx_lognorm > 0) / nrow(nmt_mtx_lognorm)) %>%
enframe(name="gene", value="cell_percent") %>%
dplyr::filter(cell_percent > 0.1 & gene %in% colnames(nmt_mtx_lognorm)) %>%
pull(gene)
seurat_objects <- list(
"scNMT" = CreateSeuratObject(t(nmt_mtx), assay="scNMT"),
"tenX_SVZ_naive" = CreateSeuratObject(t(rna_mtx_raw_10X), assay="tenX_SVZ_naive")
)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
set.seed(42) #420
n_features <- 2500 # 3000
n_pcs <- 30 #30
for (i in 1:length(seurat_objects)) {
seurat_objects[[i]] <- NormalizeData(seurat_objects[[i]], verbose = F)
seurat_objects[[i]] <- FindVariableFeatures(
seurat_objects[[i]],
selection.method="vst",
nfeatures=n_features,
verbose=F
)
}
anchors <- FindIntegrationAnchors(object.list = seurat_objects, dims = 1:n_pcs)
## Computing 2000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 7445 anchors
## Filtering anchors
## Retained 3182 anchors
integrated <- IntegrateData(anchorset = anchors, dims = 1:n_pcs)
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
DefaultAssay(integrated) <- "integrated"
# Run the standard workflow for visualization and clustering
integrated <- ScaleData(integrated, verbose = F)
integrated <- RunPCA(integrated, npcs = n_pcs, verbose = F)
integrated <- FindNeighbors(integrated, dims = 1:n_pcs, verbose = F)
integrated <- RunUMAP(integrated, reduction = "pca", dims = 1:n_pcs, verbose = F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
integrated <- FindClusters(integrated, resolution = 0.2, verbose = F)
integrated@meta.data <- integrated@meta.data %>%
mutate(dataset = case_when(
!is.na(nCount_scNMT) ~ "scNMT",
!is.na(nCount_tenX_SVZ_naive) ~ "tenX_SVZ_naive",
)) %>%
mutate(celltype_10X = unname(barcode_to_celltype_ifnko[rownames(.)]))
transfer_anchors <- FindTransferAnchors(
reference = integrated, query = seurat_objects[["scNMT"]],
dims = 1:n_pcs, reference.reduction = "pca"
)
## Projecting cell embeddings
## Finding neighborhoods
## Finding anchors
## Found 2393 anchors
## Filtering anchors
## Retained 2056 anchors
celltype_predictions <- TransferData(anchorset = transfer_anchors,
refdata = integrated$celltype_10X,
dims = 1:n_pcs) %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, predicted_ct=predicted.id, prediction_score=prediction.score.max) %>%
mutate(predicted_ct_hq = if_else(prediction_score > .3, predicted_ct, NA_character_))
## Finding integration vectors
## Finding integration vector weights
## Predicting cell labels
integrated_df <-
as_tibble(integrated@reductions$umap@cell.embeddings, rownames="sample") %>%
add_column(cluster = integrated@meta.data$seurat_clusters) %>%
mutate(
tissue = case_when(
str_detect(sample, "^SVZ") ~ "vSVZ",
str_detect(sample, "^[Ss][Tt][Rr]") ~ "striatum",
T ~ "vSVZ"
),
treatment = case_when(
str_detect(sample, "naive") ~ "naive",
str_detect(sample, "pH$") ~ "21 dpi",
str_detect(sample, "p[IJ]$") ~ "2 dpi",
T ~ "naive"
),
genotype = case_when(
str_detect(sample, "_KO\\d_") ~ "Ifnagr KO",
str_detect(sample, "pJ$") ~ "Ifnagr KO",
T ~ "wt"
),
experiment = case_when(
genotype != "wt" ~ as.character(glue::glue("{treatment} ({genotype})")),
T ~ treatment
),
method = if_else(str_detect(sample, "[ACTG]{8,}\\-"), "10X", "scNMT")
) %>%
mutate(tissue = fct_relevel(tissue, "vSVZ"),
treatment = fct_relevel(treatment, "naive", "2 dpi"),
genotype = fct_relevel(genotype, "wt"),
experiment = fct_relevel(experiment, "naive", "2 dpi", "21 dpi", "naive (Ifnagr KO)"),
celltype_10X = unname(barcode_to_celltype_ifnko[sample]),
UMAP_1 = -UMAP_1, # rotate plot if necessary
UMAP_2 = UMAP_2) %>%
left_join(celltype_predictions, by="sample")
integrated_df_10X <- integrated_df %>%
dplyr::filter(method == "10X") %>%
dplyr::select(-tissue, -treatment, -experiment)
integrated_df_nmt <- integrated_df %>%
dplyr::filter(method == "scNMT") %>%
mutate(cluster_label = case_when(
cluster == "0" ~ "NB",
cluster == "1" ~ "qNSC / astro",
cluster == "2" ~ "TAP",
cluster == "3" ~ "aNSC",
cluster == "4" ~ "OEC",
cluster == "5" ~ "neuron",
cluster == "6" ~ "oligo",
cluster == "8" ~ "reactive astro",
T ~ "other"
)) %>% # for the naive lineage, only keep those cells that were sorted by GLAST
dplyr::filter(!(treatment == "naive" & genotype == "wt" & tissue == "vSVZ" & !str_detect(sample, "pD$")))
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = cluster)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_jitter(size=.7, data = integrated_df_nmt) +
facet_grid(tissue ~ experiment) +
coord_fixed()
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = n_genes)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_jitter(size=.7, data = left_join(integrated_df_nmt, n_genes, by="sample")) +
facet_grid(tissue ~ experiment) +
coord_fixed()
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = predicted_ct_hq)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.7, data = integrated_df_nmt) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
guides(color = guide_legend(override.aes = list(size=2))) +
scale_color_aaas(na.value="gray40")
get a count matrix of the scNMT transcriptomes only
shared_genes_nmt <- intersect(colnames(naive_mtx), colnames(isch_mtx))
count_mtx_norm_nmt_ensg <- rbind(
as.matrix(naive_mtx[ , shared_genes_nmt]),
as.matrix(isch_mtx[ , shared_genes_nmt])
) %>%
magrittr::divide_by(rowSums2(.)) %>%
magrittr::multiply_by(1e4) %>%
log1p()
count_mtx_norm_nmt <- count_mtx_norm_nmt_ensg %>%
magrittr::set_colnames(unname(gene_id_to_symbol[colnames(.)]))
# from Liddelow et al. 2017
pan_ra_genes <- c("Lcn2", "Steap4", "S1pr3", "Timp1", "Hspb1", "Cxcl10", "Cd44", "Osmr", "Cp", "Serpina3n", "Aspg", "Vim", "Gfap")
ra1_genes <- c("H2-T23", "Serping1", "H2-D1", "Ggta1", "Iigp1", "Gbp2", "Fbln5", "Ugt1a1", "Fkbp5", "Psmb8", "Srgn", "Amigo2")
ra2_genes <- c("Clcf1", "Tgm1", "Ptx3", "S100a10", "Sphk1", "Cd109", "Ptgs2", "Emp1", "Slc10a6", "Tm4sf1", "B3gnt5", "Cd14")
ra_markers <- tibble(gene_symbol = c(pan_ra_genes, ra1_genes, ra2_genes),
gene_set = c(rep("Pan reactive", length(pan_ra_genes)),
rep("A1 specific", length(ra1_genes)),
rep("A2 specific", length(ra2_genes))))
reactive_signatures <-
tibble(
sample = rownames(count_mtx_norm_nmt),
pan_sig = ra_markers %>%
dplyr::filter(gene_set == "Pan reactive" & gene_symbol %in% colnames(count_mtx_norm_nmt)) %>%
pull(gene_symbol) %>%
count_mtx_norm_nmt[ , .] %>%
sparseMatrixStats::rowMeans2(),
a1_sig = ra_markers %>%
dplyr::filter(gene_set == "A1 specific" & gene_symbol %in% colnames(count_mtx_norm_nmt)) %>%
pull(gene_symbol) %>%
count_mtx_norm_nmt[ , .] %>%
sparseMatrixStats::rowMeans2(),
a2_sig = ra_markers %>%
dplyr::filter(gene_set == "A2 specific" & gene_symbol %in% colnames(count_mtx_norm_nmt)) %>%
pull(gene_symbol) %>%
count_mtx_norm_nmt[ , .] %>%
sparseMatrixStats::rowMeans2()
)
integrated_df_nmt %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2)) +
geom_point_rast(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2", color = "reactive\nastrocyte\nexpression\nsignature") +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(), #legend.position = "none",
panel.border = element_rect(colour="black", fill=NA, size=1))
#ggsave("/home/rstudio/22-11-21_reactive-astrocyte-sig.png", width=20, height=15, units="cm", dpi=500)
#ggsave("/home/rstudio/22-11-21_reactive-astrocyte-sig.svg", width=20, height=15, units="cm", fix_text_size=F)
integrated_df_nmt %>%
left_join(reactive_signatures, by="sample") %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = pan_sig)) +
geom_point_rast(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2", color = "reactive\nastrocyte\nexpression\nsignature") +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(), #legend.position = "none",
panel.border = element_rect(colour="black", fill=NA, size=1))
#ggsave("/home/rstudio/22-11-21_reactive-astrocyte-sig.png", width=20, height=15, units="cm", dpi=500)
#ggsave("/home/rstudio/22-11-21_reactive-astrocyte-sig.svg", width=20, height=15, units="cm", fix_text_size=F)
A1 astrocytes secrete a neurotoxin that induces rapid death of neurons and oligodendrocytes. A1 astrocytes have lost many characteristic astrocytic functions, including the ability to promote neuronal survival and outgrowth, promote synapse formation and function, and to phagocytose synapses and myelin debris.
integrated_df_nmt %>%
left_join(reactive_signatures, by="sample") %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = a1_sig)) +
geom_point_rast(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2", color = "A1\nastrocyte\nexpression\nsignature") +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1))
#ggsave("/home/rstudio/22-11-21_reactive-A1-sig.png", width=20, height=15, units="cm", dpi=500)
#ggsave("/home/rstudio/22-11-21_reactive-A1-sig.svg", width=20, height=15, units="cm", fix_text_size=F)
A2 reactive astrocytes are induced by ischaemia and strongly promote neuronal survival and tissue repair
integrated_df_nmt %>%
left_join(reactive_signatures, by="sample") %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = a2_sig)) +
geom_point_rast(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2", color = "A2\nastrocyte\nexpression\nsignature") +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(), #legend.position = "none",
panel.border = element_rect(colour="black", fill=NA, size=1))
#ggsave("/home/rstudio/22-11-21_reactive-A2-sig.png", width=20, height=15, units="cm", dpi=500)
#ggsave("/home/rstudio/22-11-21_reactive-A2-sig.svg", width=20, height=15, units="cm", fix_text_size=F)
lineage_cells <- integrated_df %>%
dplyr::filter(cluster %in% c(1, 2, 5, 4, 0)) %>%
pull(sample)
integrated_df_nmt %>%
left_join(as_tibble(Embeddings(integrated[, lineage_cells], "pca"), rownames="sample"), by="sample") %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = PC_3)) +
geom_point(size=.1, data = integrated_df_10X, color="gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
guides(color = guide_legend(override.aes = list(size=2)))
integrated_df_nmt %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = cluster)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
guides(color = guide_legend(override.aes = list(size=2)))
calculate pseudotime in integrated RNA PC space
# in principle, we have a complex trajectory with a branch (cell cycle).
# for simplicity, we force the trajectory to ignore this branch by assigning
# cycling cells to the nearest cluster, and by removing PC3 which captured
# the cell cycle.
integrated$merged_clusters <- integrated$seurat_clusters
integrated$merged_clusters[integrated$merged_clusters == 4] <- 5
sling_ptimes <- slingshot(
Embeddings(integrated[, lineage_cells], "pca")[, c(1:2, 4:n_pcs)],
clusterLabels=integrated[, lineage_cells]$merged_clusters,
start.clus="1",
end.clus="0") %>%
slingPseudotime() %>%
as_tibble(rownames="sample") %>%
dplyr::select(sample, ptime=Lineage1) %>%
mutate(ptime_rank = rank(ptime, ties.method="random"))
integrated_df_nmt %>%
left_join(sling_ptimes, by="sample") %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = ptime)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
coord_fixed() +
scale_color_viridis_c(option="turbo")
integrated_df_nmt %>%
left_join(sling_ptimes, by="sample") %>%
left_join(wt_celltype_labels, by="sample") %>%
arrange(!is.na(wt_celltype_label)) %>%
ggplot(mapping=aes(x = ptime_rank, y = 0, color = wt_celltype_label)) +
geom_jitter(width=0) +
scale_color_ct() +
scale_x_continuous(n.breaks = 20)
## Warning: Removed 213 rows containing missing values (geom_point).
integrated_df_nmt %>%
left_join(sling_ptimes, by="sample") %>%
left_join(wt_celltype_labels, by="sample") %>%
arrange(!is.na(wt_celltype_label)) %>%
ggplot(mapping=aes(x = ptime_rank, y = 0, color = predicted_ct_hq)) +
geom_jitter(width=0) +
scale_x_continuous(n.breaks = 20)
## Warning: Removed 213 rows containing missing values (geom_point).
quant_df <- integrated_df_nmt %>%
left_join(sling_ptimes, by="sample") %>%
mutate(celltype = case_when(
ptime_rank < 2700 ~ "qNSC1",
ptime_rank < 3600 ~ "qNSC2",
ptime_rank < 4200 ~ "aNSC",
ptime_rank < 5200 ~ "TAP",
!is.na(ptime_rank) ~ "neuroblast",
cluster == "7" ~ "reactive astrocyte"),
celltype_tissue = if_else(
tissue == "striatum" & celltype == "qNSC1",
"astrocyte (striatum)",
celltype),
celltype = fct_relevel(celltype, "reactive astrocyte", "qNSC1", "qNSC2",
"aNSC", "TAP", "neuroblast"),
celltype_tissue = fct_relevel(celltype_tissue, "reactive astrocyte", "astrocyte (striatum)",
"qNSC1", "qNSC2", "aNSC", "TAP", "neuroblast"))
quant_df %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = celltype_tissue)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
coord_fixed() +
scale_color_ct()
reactive_astros <- quant_df %>%
dplyr::filter(celltype == "reactive astrocytes") %>%
pull(sample)
quant_df %>%
dplyr::filter(!is.na(celltype)) %>%
group_by(celltype, treatment, tissue, genotype) %>%
tally() %>%
ggplot(aes(x = treatment, y = n, fill = celltype)) +
geom_col(position = "fill") +
facet_grid(tissue ~ genotype, scales="free_x", space="free_x") +
scale_fill_ct() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1)) +
labs(x = "", y = "proportion")
quant_df %>%
dplyr::filter(celltype %in% c("qNSC1", "qNSC2", "reactive astrocyte")) %>%
group_by(celltype, treatment, tissue, genotype) %>%
tally() %>%
ggplot(aes(x = treatment, y = n, fill = celltype)) +
geom_col(position = "fill") +
facet_grid(tissue ~ genotype, scales="free_x", space="free_x") +
scale_fill_ct() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1)) +
labs(x = "", y = "proportion")
quant_df %>%
dplyr::filter(celltype %notin% c("qNSC1", "reactive astrocyte")) %>%
group_by(celltype, treatment, tissue, genotype) %>%
tally() %>%
ggplot(aes(x = treatment, y = n, fill = celltype)) +
geom_col(position = "fill") +
facet_grid(tissue ~ genotype, scales="free_x", space="free_x") +
scale_fill_ct() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1)) +
labs(x = "", y = "proportion")
cell_df_total <- quant_df %>%
dplyr::filter(treatment == "2 dpi" & tissue == "striatum" &
celltype %in% c("qNSC1", "qNSC2", "reactive astrocyte")) %>%
arrange(celltype_tissue)
cell_df <- cell_df_total %>%
dplyr::filter(treatment == "2 dpi")
cell_df_ko <- cell_df_total %>%
dplyr::filter(treatment == "2 dpi (Ifnagr KO)")
cell_df %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = celltype_tissue)) +
geom_point_rast(size=.01, data = integrated_df_10X, color = "gray") +
geom_point(size=.25) +
coord_fixed() +
scale_color_ct()
#ggsave("/home/rstudio/22-04-13_reactive-marker-expression-legend.png", width=10, height=10, units="cm", dpi=500)
#ggsave("/home/rstudio/22-04-13_reactive-marker-expression-legend.svg", width=10, height=10, units="cm", fix_text_size=F)
color_cw <- function(mat, direction=1L) {
colors <- heatmaply::cool_warm(100)
if (direction < 0) {
colors <- rev(colors)
}
q1 <- quantile(mat, .03, na.rm = TRUE)
q99 <- quantile(mat, .97, na.rm = TRUE)
circlize::colorRamp2(seq(q1, q99, length = 100), colors, space="LAB")
}
color_viridis <- function(mat, ...) {
colors <- viridisLite::viridis(100, ...)
q1 <- quantile(mat, .01, na.rm = TRUE)
q99 <- quantile(mat, .99, na.rm = TRUE)
circlize::colorRamp2(seq(q1, q99, length = 100), colors, space="LAB")
}
# remove genes that are not in our count matrix
measured_ra_markers <- ra_markers %>%
dplyr::filter(gene_symbol %in% colnames(count_mtx_norm_nmt))
# remove genes that are not in any of our cells of interest
measured_ra_markers <- measured_ra_markers %>%
dplyr::filter(colSums(count_mtx_norm_nmt[cell_df$sample, gene_symbol]) > 0) %>%
mutate(gene_set = fct_relevel(gene_set, "Pan reactive"))
ha_cell <- HeatmapAnnotation(sample=cell_df$celltype_tissue,
col = list(sample = mycolorscheme2))
ha_gene <- HeatmapAnnotation(gene_set=measured_ra_markers$gene_set, which="row",
col = list(gene_set = c("Pan reactive"="black", "A1 specific"="red", "A2 specific"="ForestGreen")))
hm <- count_mtx_norm_nmt[cell_df$sample, measured_ra_markers$gene_symbol] %>%
as.matrix() %>% #scale() %>%
t() %>%
Heatmap(show_row_names=T, show_column_names=F, cluster_rows=F, cluster_columns=F,
bottom_annotation = ha_cell, right_annotation = ha_gene,
column_split = cell_df$celltype_tissue, row_split = measured_ra_markers$gene_set,
col=color_viridis(.), name="expression",
width=unit(18, "cm"), height=unit(8, "cm"), row_names_gp=gpar(fontsize=8))
#png("22-04-13_reactive-marker-expression-heatmap.png", width=35, height=25, units="cm", res=500)
draw(hm)
#dev.off()
#svglite("22-04-13_reactive-marker-expression-heatmap.svg", width=35, height=25, fix_text_size=F)
#draw(hm)
#dev.off()
shared_cols <- intersect(colnames(dna_meta), colnames(mofa_out))
dna_info <- bind_rows(
mofa_out %>% dplyr::select(all_of(shared_cols)),
dna_meta %>% dplyr::select(all_of(shared_cols))
) %>%
full_join(dna_stats, by="cell_id_dna")
dna_id_to_cellname <- dna_info %>%
dplyr::filter(!is.na(sample)) %>%
dplyr::select(cell_id_dna, sample) %>%
deframe()
integrated_df_nmt %>%
mutate(has_dna_data = sample %in% dna_info$sample) %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = has_dna_data)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
guides(color = guide_legend(override.aes = list(size=2)))
read_lmr_bed <- function(bed_path) {
read_tsv(bed_path, col_names = c("chrom", "start", "end"), show_col_types=F) %>%
mutate(chrom = str_remove(chrom, "chr"),
region = as.character(glue::glue("{chrom}:{start}-{end}"))) %>%
pull(region)
}
nsc_lmrs <- read_lmr_bed("/mnt/volume/20-11-03_scNMT-SVZ2/analysis/astro_gap/21-12-09_lineage-regions.bed")
astro_lmrs <- read_lmr_bed("/mnt/volume/20-11-03_scNMT-SVZ2/analysis/astro_gap/21-12-09_astrocyte-regions.bed")
vmr_meth_mtx <- data.table::fread(
"/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/07_matrix/naive-VMRs-CpG-matrices/methylation_fractions.csv.gz",
data.table = F) %>%
column_to_rownames(var="V1") %>%
as.matrix() %>%
magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>%
magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>%
magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)]))
vmr_meth_mtx %>% peek
## 1:3001279-3003379 1:3003729-3006419 1:3039409-3041589
## SVZ_Glast_93 pH NA NA NA
## NSC11 pC NA NA 0.3333333
## STRASTRO15 pE NA 1 NA
## SVZ_Glast_51 pH NA NA 1.0000000
## SVZ_Glast_D5_isch pJ NA NA 1.0000000
## NB13 pC 1 0 0.5000000
## SVZ_Glast_P13_isch pJ NA NA NA
## GLAST128 pD 1 NA NA
## GLAST326 pD NA 1 NA
## SVZ_Glast_679 pI NA 1 NA
## 1:3045619-3048239
## SVZ_Glast_93 pH 0
## NSC11 pC NA
## STRASTRO15 pE NA
## SVZ_Glast_51 pH NA
## SVZ_Glast_D5_isch pJ NA
## NB13 pC NA
## SVZ_Glast_P13_isch pJ NA
## GLAST128 pD NA
## GLAST326 pD NA
## SVZ_Glast_679 pI NA
lmr_df <- full_join(
enframe(rowMeans(vmr_meth_mtx[, astro_lmrs], na.rm=T), name="sample", value="mean_astrocyte_LMR_meth"),
enframe(rowMeans(vmr_meth_mtx[, nsc_lmrs], na.rm=T), name="sample", value="mean_lineage_LMR_meth"),
by = "sample"
) %>%
mutate(meth_score = mean_lineage_LMR_meth - mean_astrocyte_LMR_meth) %>%
right_join(quant_df, by="sample") %>%
arrange(!is.na(meth_score))
lmr_df %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, color = meth_score)) +
geom_point(size=.1, data = integrated_df_10X, color = "gray") +
geom_point(size=.5) +
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_color_gradient2(low=mycolorscheme2["aNSC"], high=mycolorscheme2["qNSC1"], mid="gray60") +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1))
#ggsave("/home/rstudio/22-04-13_astro-nsc-meth-score.png", width=20, height=15, units="cm", dpi=500)
#ggsave("/home/rstudio/22-04-13_astro-nsc-meth-score.svg", width=20, height=15, units="cm", fix_text_size=F)
lmr_df %>%
dplyr::filter(!is.na(meth_score)) %>%
slice_sample(prop=1) %>%
ggplot(mapping=aes(x = UMAP_1, y = UMAP_2, fill = meth_score)) +
geom_point(size=1, data = integrated_df_10X, color = "gray", fill = "gray") + # 0.1
geom_point(size=2, stroke=.1, shape=21, color = "black") + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_gradient2(low=mycolorscheme2["aNSC"],
high=mycolorscheme2["qNSC1"],
mid="white") + # gray60
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1))
#ggsave("/home/rstudio/22-04-13_astro-nsc-meth-score.png", width=20, height=15, units="cm", dpi=500)
#ggsave("/home/rstudio/22-04-13_astro-nsc-meth-score.svg", width=20, height=15, units="cm", fix_text_size=F)
lmr_df %>%
dplyr::filter(!is.na(ptime)) %>% # & treatment %in% c("naive", "2 dpi")) %>%
ggplot(aes(x = ptime_rank,
y = meth_score,
color = celltype_tissue)) +
geom_hline(yintercept = 0, size=.5, color="gray70") +
geom_point(size = .5) +
facet_grid(experiment ~ tissue) +
scale_color_ct() +
labs(x = "cells ordered by pseudotime", y = "methylation score") +
theme(panel.grid.major = element_blank(), legend.position="none")
## Warning: Removed 867 rows containing missing values (geom_point).